Linear Regression

Imports



In [75]:

    
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
plt.rcParams['figure.figsize'] = (20.0, 10.0)

Data



In [53]:

    
# Load the boston dataset.
boston = load_boston()
X, y = boston['data'], boston['target']



In [54]:

    
X.shape









    Out[54]:





(506, 13)



In [55]:

    
X = X[:30,1]



In [62]:

    
y = y[:30]



In [56]:

    
X.shape









    Out[56]:





(30,)



In [57]:

    
X









    Out[57]:





array([ 18. ,   0. ,   0. ,   0. ,   0. ,   0. ,  12.5,  12.5,  12.5,
        12.5,  12.5,  12.5,  12.5,   0. ,   0. ,   0. ,   0. ,   0. ,
         0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,
         0. ,   0. ,   0. ])



In [59]:

    
X = np.array([np.ones(X.shape[0]),X])



In [67]:

    
X = X.T



In [70]:

    
X.shape









    Out[70]:





(30, 2)

Functions



In [96]:

    
def cost_function(X, Y, B='na'):
    if B == 'na':
        B = np.zeros(X.shape[1])

    m = len(Y)
    J = np.sum((X.dot(B) - Y) ** 2)/(2 * m)
    return J



In [131]:

    
def gradient_descent(X, Y, theta='na', alpha=0.0001, iterations=1000):
    #X = np.concatenate((np.ones(X.shape[0])[:, np.newaxis], X), axis=1)
    #X = X.T
    
    if theta == 'na':
        theta = np.zeros(X.shape[1])
    cost_history = [0] * iterations
    
    for iteration in range(iterations):
        hypothesis = X.dot(theta)
        loss = hypothesis-y
        gradient = X.T.dot(loss)/m
        theta = theta - alpha*gradient
        cost = cost_function(X, y, theta)
        cost_history[iteration] = cost

    return theta, cost_history
    return B, cost_history



In [132]:

    
X = boston.data
y = boston.target
X = pd.DataFrame(X)
m = X.shape[0]
x0 = np.ones(m)
aa = [x0]
for i in range(X.shape[1]):
    aa.append(X[i])
X = np.array(aa).T
#B = np.array([0, 0, 0])



In [133]:

    
cost_function(X, y)









    Out[133]:





296.07345849802368



In [136]:

    
newB, cost_history = gradient_descent(X, y, 1000)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-136-5bdd526923ba> in <module>()
----> 1 newB, cost_history = gradient_descent(X, y, 1000)

<ipython-input-131-a637f134f196> in gradient_descent(X, Y, theta, alpha, iterations)
      9     for iteration in range(iterations):
     10         hypothesis = X.dot(theta)
---> 11         loss = hypothesis-y
     12         gradient = X.T.dot(loss)/m
     13         theta = theta - alpha*gradient

ValueError: operands could not be broadcast together with shapes (506,14) (506,)



In [ ]: